library(foreign)
library(effects)
library(nnet)
library(MASS)
library(dplyr)
library(tidyverse)
library(plyr) 
library(haven)
library(simcf)
library(ggplot2)
options(scipen = 20)


## attach the vbf object unless it is already attached
if (!length(grep("vbf_output", search()))) {
  cat ("Attaching vbf... this may take a few moments...")
  attach("vbf_output")
}

########################################################
################# MATCHING ANALYSIS ####################
########################################################

# HW NOTE: Per reviewers, recode party to include independents and recode race to include latinos

vbf <- vbf %>% mutate (party = ifelse(is.na(party_cd), "other",
                                      ifelse(party_cd=="UNA","ind",
                                      ifelse(party_cd == "DEM", "dem", 
                                             ifelse(party_cd == "REP", "rep", "other")))),
                       race= ifelse(ethnicity=="latino_yes","latino",
                                    ifelse(race_code=="B","black",
                                           ifelse(race_code=="W","white", NA))))

## Read all border pairings
borders2use <- read_delim (file = "county-early-voting-differences.csv",
                           delim = "\t",
                           col_names = TRUE)
cat ("Read ", nrow(borders2use), " county borders in North Carolina.\n", sep = "")

## Create vector that describes the ways that county borders can be ranked
measures2use <-  c("totalhoursDID", "sitesDID", "evehoursDID", "sathoursDID", "sunhoursDID")

## set up variables that facilitate different tests
races2use <- c("black", "white","latino") 
parties2use <- c("dem", "rep","ind")
votetypes2use <- c("abs2012", "ed2012", "eip2012", "novote2012")

## print header line in test output file.  Note that this erases
## tests_output.txt, if this file already exists.

cat("border_measure", "\t",
    "border_measure_value", "\t",
    "borderrank", "\t",
    "test_type", "\t",
    "race", "\t",
    "party", "\t",
    "county1", "\t",  ## name of county1
    "county2", "\t",  ## and county2
    "value1", "\t",   ## EIP statistic used to rank borders, county1
    "value2", "\t",   ## and county2
    "proportion1", "\t",  ## estimated percentage for county1
    "proportion2", "\t",  ## and county2
    "test_statistic", "\t", ## test statistic for difference in proportions test
    "p_value",   ## p-value for test
    "\n",
    file = "tests_output_lat&ind.txt",
    sep = "")

## Loop over measure for ranking county borders
for (measure2use in measures2use) {
  
  cat ("\n**** Border measure: ", measure2use, "\n\n", sep = "")
  
  ## order borders2use depending on statistic measure2use
  if (measure2use == "totalhoursDID") {
    borders2use <- borders2use[order(as.double(borders2use$totalhoursDID), decreasing = TRUE),]
  }
  else if (measure2use == "sitesDID") {
    borders2use <- borders2use[order(as.double(borders2use$sitesDID), decreasing = TRUE),]
  }
  else if (measure2use == "evehoursDID") {
    borders2use <- borders2use[order(as.double(borders2use$evehoursDID), decreasing = TRUE),]
  }
  else if (measure2use == "sathoursDID") {
    borders2use <- borders2use[order(as.double(borders2use$sathoursDID), decreasing = TRUE),]
  }
  else if (measure2use == "sunhoursDID") {
    borders2use <- borders2use[order(as.double(borders2use$sunhoursDID), decreasing = TRUE),]
  }
  else {
    stop ("Error in measure2use.  Do not know how to sort county borders.\n")
  }
  
  ## Loop over county borders (control/treatment pairs of counties)
  for (borderrank in 1:nrow(borders2use)) {
    ## treatment county: county1
    ## control county: county2
    ## Note that considering counties 1 and 2 treatment and control, respectively, is WLOG
    cat ("Working on ranked border ", borderrank, " of ", nrow(borders2use), ".\n", sep = "")
    cat ("\tCounty 1: ", borders2use$county1[borderrank], "\n\tCounty 2: ", borders2use$county2[borderrank], "\n", sep = "")
    
    if (measure2use == "totalhoursDID") {
      value1 <- borders2use$totalhoursChg1[borderrank]
      value2 <- borders2use$totalhoursChg2[borderrank]
    }
    else if (measure2use == "sitesDID") {
      value1 <- borders2use$sitesChg1[borderrank]
      value2 <- borders2use$sitesChg2[borderrank]
    }
    else if (measure2use == "evehoursDID") {
      value1 <- borders2use$evehoursChg1[borderrank]
      value2 <- borders2use$evehoursChg2[borderrank]
    }
    else if (measure2use == "sathoursDID") {
      value1 <- borders2use$sathoursChg1[borderrank]
      value2 <- borders2use$sathoursChg2[borderrank]
    }
    else if (measure2use == "sunhoursDID") {
      value1 <- borders2use$sunhoursChg1[borderrank]
      value2 <- borders2use$sunhoursChg2[borderrank]
    }
    else {
      stop ("Error in measure2use.  Cannot assign value1 and value2.\n")
    }
    
    ## Loop over registered voter race
    for (race2use in races2use) {
      ## Loop over registered voter party
      for (party2use in parties2use) {
        cat ("------\nRace: ", race2use, "\nParty: ", party2use, "\n", sep = "")
        
        ## make query text
        text_treatment <- paste("voter_status_desc_2012 == \"ACTIVE\" & voter_status_desc_2016 == \"ACTIVE\" & COUNTY_NAM == \"", borders2use$county1[borderrank], "\" & borders_", borders2use$county2[borderrank]," == TRUE & race == \"", race2use, "\" & party == \"", party2use, "\"", sep = "")
        text_control <- paste("voter_status_desc_2012 == \"ACTIVE\" & voter_status_desc_2016 == \"ACTIVE\" & COUNTY_NAM == \"", borders2use$county2[borderrank], "\" & borders_", borders2use$county1[borderrank]," == TRUE & race == \"", race2use, "\" & party == \"", party2use, "\"", sep = "")
        
        ## execute queries with filter
        vbf_treatment <- vbf %>% filter_(text_treatment)
        vbf_control <- vbf %>% filter_(text_control)
        
        ##  Initialize 4x4 results matricies
        matrix_treatment =  matrix_control = matrix (nrow = 0, ncol = 4)
        
        ## Fill in 4x4 results matrices, one for control county
        ## (county 2) and one for treatment county (county 1)
        for (votetype2use in votetypes2use) {
          filter2use <- paste("outcome2012 == \"", votetype2use, "\"", sep = "")
          
          row2use_treatment = vbf_treatment %>% filter_(filter2use) %>% summarise (abs2016 = sum(outcome2016 == "abs2016"),
                                                                                   ed2016 = sum(outcome2016 == "ed2016"), 
                                                                                   eip2016 = sum(outcome2016 == "eip2016"), 
                                                                                   novote2016 = sum(outcome2016 == "novote2016"))
          matrix_treatment <- rbind (matrix_treatment, row2use_treatment)
          
          row2use_control = vbf_control %>% filter_(filter2use) %>% summarise (abs2012 = sum(outcome2016 == "abs2016"),
                                                                               ed2012 = sum(outcome2016 == "ed2016"), 
                                                                               eip2016 = sum(outcome2016 == "eip2016"), 
                                                                               novote2016 = sum(outcome2016 == "novote2016"))
          matrix_control <- rbind (matrix_control, row2use_control)
        }
        ## put names on the treatment and control matrices
        row.names (matrix_treatment) = votetypes2use
        row.names (matrix_control) = votetypes2use
        
        ## Done creating 4x4 matrices.  Now aggregate to get corresponding
        ## 2x2 matrices (vote / no-vote).
        
        matrix_control_small = matrix (nrow = 2, ncol = 2, byrow = TRUE,
                                       c(matrix_control[match("novote2012", votetypes2use),
                                                        match("novote2016", colnames(matrix_control))],
                                         sum(matrix_control[match("novote2012", votetypes2use), -match("novote2016", colnames(matrix_control))]),
                                         sum(matrix_control[-match("novote2012", votetypes2use), match("novote2016", colnames(matrix_control))]),
                                         sum(matrix_control[-match("novote2012", votetypes2use), -match("novote2016", colnames(matrix_control))])))
        row.names (matrix_control_small) <- c("novote2012", "vote2012")                                 
        colnames (matrix_control_small) <- c("novote2016", "vote2016")      
        
        matrix_treatment_small = matrix (nrow = 2, ncol = 2, byrow = TRUE,
                                         c(matrix_treatment[match("novote2012", votetypes2use),
                                                            match("novote2016", colnames(matrix_treatment))],
                                           sum(matrix_treatment[match("novote2012", votetypes2use), -match("novote2016", colnames(matrix_treatment))]),
                                           sum(matrix_treatment[-match("novote2012", votetypes2use), match("novote2016", colnames(matrix_treatment))]),
                                           sum(matrix_treatment[-match("novote2012", votetypes2use), -match("novote2016", colnames(matrix_treatment))])))
        row.names (matrix_treatment_small) <- c("novote2012", "vote2012")                                 
        colnames (matrix_treatment_small) <- c("novote2016", "vote2016")    
        
        ## now get differences in proportions, saved in matrix_difference_small
        ## matrix_difference_small <- matrix_control_small / apply(matrix_control_small, MARGIN = 1, sum) -
        ##    matrix_treatment_small / apply(matrix_treatment_small, MARGIN = 1, sum)
        
        ## Done creating 2x2 matrices.
        
        ## Test for changes in distribution of novote2012
        x <- c(matrix_control_small["novote2012","novote2016"],
               matrix_treatment_small["novote2012","novote2016"])
        
        n <- c(sum(matrix_control_small["novote2012",]),
               sum(matrix_treatment_small["novote2012",]))
        
        if (min(n) >= 20) {
          cat ("Changes in probability of novote2016 given novote2012.\n", sep = "")
          print(test_output <- prop.test(x = x, n = n))
          cat(measure2use, "\t",
              unlist(borders2use[borderrank, match(measure2use, names(borders2use))]), "\t",
              borderrank, "\t",
              "change_distr_novote2016_given_novote2012", "\t",
              race2use, "\t",
              party2use, "\t",
              borders2use$county1[borderrank], "\t",
              borders2use$county2[borderrank], "\t",
              value1, "\t",
              value2, "\t",
              test_output$estimate[1], "\t",
              test_output$estimate[2], "\t",
              test_output$statistic, "\t",
              test_output$p.value,
              "\n",
              file = "tests_output_lat&ind_R&R.txt",
              sep = "",
              append = TRUE)
        }
        else {
          cat ("No test for changes in distribution of novote2012 (n too small).\n", sep = "")
        }
        
        ## test for changes in distribution of vote2012
        x <- c(matrix_control_small["vote2012","vote2016"],
               matrix_treatment_small["vote2012","vote2016"])
        
        n <- c(sum(matrix_control_small["vote2012",]),
               sum(matrix_treatment_small["vote2012",]))
        
        if (min(n) >= 20) {
          cat ("Changes in distribution of vote2016 given vote2012.\n", sep = "")
          print(test_output <- prop.test(x = x, n = n))
          cat(measure2use, "\t",
              unlist(borders2use[borderrank, match(measure2use, names(borders2use))]), "\t",
              borderrank, "\t",
              "change_distr_vote2016_given_vote2012", "\t",
              race2use, "\t",
              party2use, "\t",
              borders2use$county1[borderrank], "\t",
              borders2use$county2[borderrank], "\t",
              value1, "\t",
              value2, "\t",
              test_output$estimate[1], "\t",
              test_output$estimate[2], "\t",
              test_output$statistic, "\t",
              test_output$p.value,
              "\n",
              file = "tests_output_lat&ind_R&R.txt",
              sep = "",
              append = TRUE)
        }
        else {
          cat ("No test for changes in distribution of vote2012 (n too small).\n", sep = "")
        }
      }
    }
  }
}


## sink()
